import scrapy
from xiaohua.items import XiaohuaItem


class XiaohSpider(scrapy.Spider):
    name = 'xiaoh'
    # allowed_domains = ['xiaohua.com']
    start_urls = ['http://www.521609.com/meinvxiaohua/']

    page_num = 2
    url = 'http://www.521609.com/meinvxiaohua/list12%d.html'

    def parse(self, response):
        li_list = response.xpath('//div[@id="content"]/div[2]/div[2]/ul/li')
        for li in li_list:
            content = li.xpath('./a[2]/text()|./a[2]/b/text()').extract_first()
            urls = li.xpath('./a[1]/@href').extract_first()
            url = urls.lstrip('/')
            yield response.follow(url=urls, callback=self.getNextHtmlUrl, meta={'title': content})

        if self.page_num <= 11:
            new_url = format(self.url % self.page_num)
            self.page_num += 1
            yield response.follow(url=new_url, callback=self.parse)

    def getNextHtmlUrl(self, response):
        li_list = response.xpath('//*[@id="content"]/div[2]/div[2]/div[4]/ol/li')
        content = response.meta['title']
        print(content)
        if li_list:
            for li in li_list:
                page_url = li.xpath('./a[1]/@href').extract_first()
                if page_url:
                    yield response.follow(url=page_url, callback=self.getImgUrl, meta={'title': content})

    def getImgUrl(self, response):
        item = XiaohuaItem()
        src = response.xpath('//div[@class="picbox"]/a/img/@src').extract_first()
        item['title'] = response.meta['title']
        item['src'] = "http://www.521609.com" + src
        yield item
